In [1]:

    
from configparser import ConfigParser
from os.path import join
from os import pardir

Configurar las credenciales para acceder al API de Twitter



In [2]:

    
config = ConfigParser()
config.read(join(pardir,'src','credentials.ini'))









    Out[2]:





['../src/credentials.ini']



In [3]:

    
APP_KEY = config['twitter']['app_key']
APP_SECRET = config['twitter']['app_secret']
OAUTH_TOKEN =  config['twitter']['oauth_token']
OAUTH_TOKEN_SECRET =  config['twitter']['oauth_token_secret']



In [4]:

    
from twitter import oauth, Twitter, TwitterHTTPError

Esta es la molona librería que vamos a utilizar: https://github.com/sixohsix/twitter/tree/master



In [5]:

    
auth = oauth.OAuth(OAUTH_TOKEN, OAUTH_TOKEN_SECRET,
                   APP_KEY, APP_SECRET)

twitter_api = Twitter(auth=auth)
twitter_api.retry = True

1 . Recoger tweets a partir de un id



In [6]:

    
tweet = twitter_api.statuses.show(_id='628949369883000832')



In [7]:

    
tweet['text']









    Out[7]:





"dear @Microsoft the newOoffice for Mac is great and all, but no Lync update? C'mon."

2. Recoger tweets de una usuaria



In [8]:

    
femfreq_tweet_search = twitter_api.statuses.user_timeline(screen_name="femfreq", count=100)



In [9]:

    
femfreq_tweet_search[0]['user']['description']









    Out[9]:





'Feminist Frequency is an educational nonprofit working for a more equitable media landscape and online world. Created by Anita Sarkeesian.'



In [10]:

    
femfreq_tweet_search[-1]['text']









    Out[10]:





"Time is running out! Just 4 days left to back Ordinary Women, it doesn't happen without you! https://t.co/gWbfnMQOcp https://t.co/3FJIs5Bozy"

3. Recoger tweets a partir de una consulta



In [11]:

    
tweets = twitter_api.search.tweets(q="#feminazi", count=100)



In [12]:

    
tweets['search_metadata']









    Out[12]:





{'completed_in': 0.121,
 'count': 100,
 'max_id': 723548002791510017,
 'max_id_str': '723548002791510017',
 'next_results': '?max_id=722730474121084927&q=%23feminazi&count=100&include_entities=1',
 'query': '%23feminazi',
 'refresh_url': '?since_id=723548002791510017&q=%23feminazi&include_entities=1',
 'since_id': 0,
 'since_id_str': '0'}



In [13]:

    
import pandas as pd

text_gathered = [tweet_data['text'] for tweet_data in tweets['statuses']]
num_tweets = len(text_gathered)
pd_tweets = pd.DataFrame( {'tweet_text': text_gathered,
                           'troll_tag': [False] * num_tweets})



In [14]:

    
pd_tweets.head()









    Out[14]:






  
    
      
      troll_tag
      tweet_text
    
  
  
    
      0
      False
      @1800flowers Ads pay 4 Limbaugh to call us str...
    
    
      1
      False
      Stop the spread of cultural marxism. #Feminazi...
    
    
      2
      False
      #VivasNosQueremos\n#NantzinVive en mi corazón\...
    
    
      3
      False
      RT @jalgete: Todo es "violencia de género" y "...
    
    
      4
      False
      RT @jalgete: Todo es "violencia de género" y "...



In [15]:

    
pd_tweets.to_csv('maybe_troll.csv')



In [16]:

    
ls









    



0. Gather data.ipynb  maybe_troll.csv

	troll_tag	tweet_text
0	False	@1800flowers Ads pay 4 Limbaugh to call us str...
1	False	Stop the spread of cultural marxism. #Feminazi...
2	False	#VivasNosQueremos\n#NantzinVive en mi corazón\...
3	False	RT @jalgete: Todo es "violencia de género" y "...
4	False	RT @jalgete: Todo es "violencia de género" y "...